## Code that creates table with descriptive statistics for matched and unmatched sample
## Last changed: 2025-05-23

  sink("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Output/TableA1.txt", split=TRUE)

library(PSweight)
library(cobalt)
library(MatchIt)
library(distances)
library(moreparty)
library(party)
library(caret)
library(bonsai)
library(randomForest)
library(tidymodels)
library(tidyverse)
library(rmarkdown)
library(tinytex)
library(tibble)
library(knitr)
library(car)
library(fst)
library(stargazer)
library(pryr)
library(stringr)
library(fst)
library(lubridate)
library(data.table)
library(summarytools)
library(ggplot2)
library(cowplot)
library(ggpubr)
library(haven)
library(broom)
library(fixest)
library(modelsummary)
library(gt)
library(MASS)
library(survey)
library(collapse)
library(readxl)
library(Hmisc)
library(DescTools)
library(ranger)
library(stablelearner)
library(future.apply)
library(lme4)
library(partykit)
library(ggiplot)

# Specify the name of the treatment variable
  tvar <- "T94_90"

# Specify the the birth cohorts to be used for the analysis
  lbyr <- 1942
  ubyr <- 1967

# Specify if only include Swedish citizens 
  citizen <- 1

  # Specify the propensity score specification
  psformula <- formula(T94 ~
                         Kon + DAStod_1982 + DAStod_1983 + DAStod_1984 +
                         DAStod_1985 + DAStod_1986 + DAStod_1987 + DAStod_1988 +
                         DAStod_1989 +
                         DAntBarn_91 + Sambo_91 + InvBak  |
                         SSYK3_90 + SNI2_91 + Kommun_91 +
                         rCARB_1982 + rCARB_1983 + rCARB_1984 + rCARB_1985 +
                         rCARB_1986 + rCARB_1987 + rCARB_1988 + rCARB_1989 +
                         pArbInk_1990 + pArbInk_1991 + UtbAr_91 +
                         FodAr + Nom82 + Nom85 + Nom88 + Nom91)
  
  
  NomVald <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_NomVald.fst", as.data.table=T)
  RTB <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_RTB.fst", as.data.table=T)
  db <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_Nom.fst", as.data.table=T)
  
  db[, T94 := get(tvar)]
  mdb <- db[between(FodAr, lbyr, ubyr) & (SvMedb82_18 >= citizen) & AllaVal == 1,]
  rm(db)
  gc()
  
  # Use logit model to estimate propensity scores
  mdb <- mdb[complete.cases(mdb),]
  tm <- proc.time()
  logmod <- feglm(psformula,  
                  data = mdb, family="binomial")
  proc.time()-tm
  summary(logmod)
  
  # Extract predicted probability (propensity scores)
  if(logmod$nobs == logmod$nobs_origin){ 
    mdb[, pT94 := predict(logmod, type = "response")]
  }else{
    mdb[logmod$obs_selection$obsRemoved, pT94 := predict(logmod, type = "response")]
  }
  
  mdb <- mdb[complete.cases(mdb),]
  
  # Now use the Full matching approach of Sävje et al. to obtain ATT and ATE weights.  
  
  tmp <- proc.time()
  set.seed(7892)
  m.out <- matchit(T94~1, data = mdb,
                   distance = mdb$pT94, method = "quick", estimand = "ATT")
  proc.time()-tmp
  m.data1 <- as.data.table(match.data(m.out))[, .(LopNr, weights)]
  setnames(m.data1, "weights", "gm_att")
  
  mdb <- m.data1[, .(LopNr, gm_att)][mdb, on = "LopNr"]
  gc()
  RTB <- mdb[RTB, on = "LopNr"]
  
  rm(mdb)
  gc()
  
  rm(list=setdiff(ls(), c("NomVald", "RTB", "rtbvar", "Models", "j", "tvar", "lbyr", "ubyr", "citizen", "psformula", "urval")))
  gc()
  

  mdb <- RTB[!is.na(gm_att) & Ar == 1991, 
           .(Kon, Nom91, DAntBarn_91, Sambo_91, 
             UtbAr_91, FodAr, InvBak, pArbInk_1991,
             gm_att, T94, LopNr, InvBak)]

  gc()
  uniqueN(mdb$LopNr)

  mdb[, Nom91 := Nom91*100]
  mdb[, Kvinna := Kon-1 ]

## Use the cobalt package to assess balance
  cvar <- c("FodAr", "Kvinna", "InvBak", "UtbAr_91", "pArbInk_1991", "DAntBarn_91", "Sambo_91", "Nom91")

  bt <- bal.tab(mdb[, ..cvar], treat = mdb[, T94], weights=mdb[, gm_att],
        un = TRUE, s.d.denom="treated", binary="std", disp = c("means", "sds"))
  bt

  destab <- as.data.table(cbind(bt$Balance$M.0.Un, bt$Balance$M.1.Un, bt$Balance$Diff.Un, 
                              bt$Balance$M.0.Adj, bt$Balance$M.1.Adj, bt$Balance$Diff.Adj))

  setnames(destab, c("V1", "V2", "V3", "V4", "V5", "V6"), c("M0_Un", "M1_Un", "SMDUn", "M0_Adj", "M1_Adj", "SMDAdj"))
  varnames <-c("Birth year", 
             "Female", 
             "Immigrant background", 
             "Education (yrs)", 
             "Earnings",  
             "Children",  
             "Partnership",  
             "Nominated")  

  destab[, Var := varnames]
  setcolorder(destab, "Var")

  destab[Var == "Nominated", M0_Un := M0_Un*100]
  destab[Var == "Nominated", M1_Un := M1_Un*100]
  destab[Var == "Nominated", M0_Adj := M0_Adj*100]
  destab[Var == "Nominated", M1_Adj := M1_Adj*100]

  datasummary_df(destab, output = "C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Tables/TableA1.tex", fmt = 3)
  sink()

